package mosdi.subcommands;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import mosdi.discovery.MotifFinder;
import mosdi.discovery.ObjectiveFunction;
import mosdi.discovery.objectives.OccurrenceCountObjective;
import mosdi.discovery.objectives.SequenceCountObjective;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.IIDTextModel;
import mosdi.fa.MarkovianTextModel;
import mosdi.index.SuffixTree;
import mosdi.index.SuffixTreeWalker;
import mosdi.util.Alphabet;
import mosdi.util.BitArray;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;
import mosdi.util.Log;
import mosdi.util.NamedSequence;
import mosdi.util.SequenceUtils;
import mosdi.util.iterators.IupacPatternIterator;
import mosdi.util.iterators.LexicographicalIterator;
import mosdi.util.iterators.RangeIterator;

public class PValueBoundsSubcommand extends Subcommand {

	@Override
	public String usage() {
		return
		super.usage()+" [options] <pattern-length> <objective> <fasta-file>\n" +
		"\n" +
		" General options\n" +
		"  -i: replace unknown characters in DNA sequences by $\n" +
		"\n" +
		" Controlling MOTIF SPACE to be searched:\n" +
		"  -m: minimum number of letters from {A,C,G,T},{W,S,R,Y,K,M},{B,D,H,V},{N}, default: \"0,0,0,0\"\n" +
		"  -M: maximum number of letters from {A,C,G,T},{W,S,R,Y,K,M},{B,D,H,V},{N}, default: \"<length>,0,0,<length>/2\"\n" +
		"  -l <lower-bound>: only consider patterns s.t. lower-bound<=pattern\n" +
		"  -u <upper-bound>: only consider patterns s.t. pattern<upper-bound\n" +
		"  -r: consider reverse complement\n" +
		"\n" +
		" Allowed OBJECTIVES to be given as <objective>:\n" +
		"   \"occ-count\": total number of occurrences\n" +
		"   \"seq-count\": number of sequences with at least one occurrence\n" +
		"\n" +
		" Further options\n" +
		"  -C <good-enough-bound>: when clump size bounds are computed dynamically, no new calculations\n" +
		"                          are made when a bound <=good_enough_bound is known (default: 1.01)\n"+
		"  -O <model-order>: use background model of given order (default: 0)\n" +
		"  -d: use detailed bounds on conditional probabilities (takes longer to precompute,\n" +
		"      but yields better bounds)";
	}
	
	@Override
	public String description() {
		return "Enumerates motifs and calculates p-value bounds for all motif prefixes (useful to evaluate bound quality).";
	}

	@Override
	public String name() {
		return "p-value-bounds";
	}

	private static class SearchStateImpl implements MotifFinder.SearchState {
		private IupacStringConstraints constraints;
		private int[] pattern;
		private boolean considerReverse;
		public SearchStateImpl(IupacStringConstraints constraints,	int[] pattern, boolean considerReverse) {
			this.constraints = constraints;
			this.pattern = pattern;
			this.considerReverse = considerReverse;
		}
		@Override
		public int[] getAbelianPattern() { throw new UnsupportedOperationException(); }
		@Override
		public BitArray[] getGeneralizedAlphabet() { return Iupac.asGeneralizedAlphabet(); }
		@Override
		public IupacStringConstraints getIupacConstraints() { return constraints; }
		@Override
		public int getMultiplicityLeft() { throw new UnsupportedOperationException(); }
		@Override
		public int[] getPattern() { return pattern;	}
		public void setPattern(int[] pattern) { this.pattern = pattern;	}
		@Override
		public int getStringLength() { return pattern.length; }
		@Override
		public boolean isReverseConsidered() { return considerReverse; }
		@Override
		public void requestMultiplicity() {}
	}
	
	@Override
	public int run(String[] args) {
		parseOptions(args, 3, "m:M:O:l:u:rC:id");

		// Option dependencies
		// --- none ---

		// Mandatory arguments
		int patternLength = getIntArgument(0);
		String objectiveName = getStringArgument(1);
		String filename = getStringArgument(2);

		// Options
		final Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		final Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		boolean considerReverse = getBooleanOption("r", false);
		int[] minFrequencies = {0,0,0,0};
		minFrequencies = getRangedIntTupleOption("m", 4, 0, patternLength, minFrequencies);
		int[] maxFrequencies = {patternLength,0,0,patternLength/2};
		maxFrequencies = getRangedIntTupleOption("M", 4, 0, patternLength, maxFrequencies);
		int modelOrder = getNonNegativeIntOption("O", 0);
		int[] lowerBound = getStringOption("l", iupacAlphabet, null);
		int[] upperBound = getStringOption("u", iupacAlphabet, null);
		double goodEnoughClumpSizeBound = getRangedDoubleOption("C", 1.0, Double.POSITIVE_INFINITY, 1.01);
		boolean replaceUnknownByMinusOne = getBooleanOption("i", false);
		boolean useDetailedBounds = getBooleanOption("d", false);

		List<NamedSequence> namedSequences = null;
		
		if (!(objectiveName.equals("occ-count") || objectiveName.equals("seq-count"))) {
			Log.errorln("Unknown objective: "+objectiveName);
			System.exit(1);
		}
		if (objectiveName.equals("seq-count") && replaceUnknownByMinusOne) {
			Log.errorln("Objective \"seq-count\" may not be used together with option -i.");
			System.exit(1);
		}
		Log.startTimer();
		try{
			namedSequences = SequenceUtils.readFastaFile(filename, dnaAlphabet, replaceUnknownByMinusOne);
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}
		Log.stopTimer("Reading FASTA file");
		int effectiveLength = 0;
		for (NamedSequence ns : namedSequences) effectiveLength+=ns.length()-patternLength+1;
		int[] sequenceLengths = null;
		if (objectiveName.equals("seq-count")) {
			sequenceLengths = new int[namedSequences.size()];
			for (int i=0; i<namedSequences.size(); ++i) {
				sequenceLengths[i] = namedSequences.get(i).length();
			}
		}
		Log.startTimer();
		List<int[]> sequences = new ArrayList<int[]>(namedSequences.size());
		for (NamedSequence ns : namedSequences) sequences.add(ns.getSequence());
		FiniteMemoryTextModel textModel = (modelOrder==0)?new IIDTextModel(dnaAlphabet.size(), sequences):new MarkovianTextModel(modelOrder,dnaAlphabet.size(),sequences);
		Log.stopTimer("Estimating text model");
		
		SuffixTree suffixTree = new SuffixTree();
		// allow sequences to be garbage collected
		sequences = null;
		int[] concatSequence = SequenceUtils.concatSequences(namedSequences, considerReverse);
		namedSequences = null;
		suffixTree.buildTree(concatSequence, dnaAlphabet.size(), patternLength);
		concatSequence = null;
		int[] occurrenceCountAnnotation = null;
		if (objectiveName.equals("occ-count")) {
			occurrenceCountAnnotation = suffixTree.calcOccurrenceCountAnnotation();
		}
		BitArray[] sequenceOccurrenceAnnotation = null;
		if (objectiveName.equals("seq-count")) {
			List<int[]> l = SequenceUtils.appendMinusOnes(namedSequences, considerReverse);
			sequenceOccurrenceAnnotation = suffixTree.calcSequenceOccurrenceAnnotation(l);
			
		}

		Log.startTimer();
		BitArray[] generalizedAlphabet = Iupac.asGeneralizedAlphabet();
		int[] maxMatchCountAnnotation = suffixTree.calcMaxMatchCountAnnotation(patternLength);
		ObjectiveFunction objective = null;
		if (objectiveName.equals("occ-count")) {
			OccurrenceCountObjective o = new OccurrenceCountObjective(textModel, occurrenceCountAnnotation, maxMatchCountAnnotation, effectiveLength, useDetailedBounds);
			o.setGoodEnoughClumpSizeBound(goodEnoughClumpSizeBound);
			objective = o;
		}
		if (objectiveName.equals("seq-count")) {
			SequenceCountObjective o = new SequenceCountObjective(textModel, sequenceLengths, sequenceOccurrenceAnnotation, useDetailedBounds);
			o.setGoodEnoughClumpSizeBound(goodEnoughClumpSizeBound);
			objective = o;
		}
		
		IupacStringConstraints constraints = new IupacStringConstraints(minFrequencies,maxFrequencies);
		LexicographicalIterator iterator = new IupacPatternIterator(patternLength, constraints);
		if ((lowerBound!=null) || (upperBound!=null)) {
			iterator = new RangeIterator(iterator, lowerBound, upperBound);
		}
		// bounds[i] contains the p-value (lower) bound computed from length-i prefix,
		double[] bounds = new double[patternLength+1];
		double[] minima = new double[patternLength+1];
		Arrays.fill(minima, Double.POSITIVE_INFINITY);
		int[] previousPattern = null;
		int[] pattern = null;
		SearchStateImpl searchState = new SearchStateImpl(constraints, iterator.peek(), considerReverse);
		objective.initialize(searchState);
		SuffixTreeWalker walker = suffixTree.getWalker();
		while (iterator.hasNext()) {
			previousPattern = pattern;
			pattern = iterator.next();
			searchState.setPattern(pattern);
			int k = iterator.getLeftmostChangedPosition();
			walker.backward(k);
			if (previousPattern!=null) {
				printBoundQuality(iupacAlphabet.buildString(previousPattern),bounds,minima,k);
			}
			Arrays.fill(minima,k+1,minima.length,Double.POSITIVE_INFINITY);
			int[] nodes = null;
			for (; k<patternLength; ++k) {
				nodes = walker.forward(generalizedAlphabet[pattern[k]]);
				if (nodes.length==0) {
					iterator.skip(k);
					break;
				}
				objective.updatePattern(pattern[k], k);
				bounds[k+1] = objective.pValueLowerBound(k+1, nodes);
				Log.printf(Log.Level.STANDARD, ">> %s %d %e\n", iupacAlphabet.buildString(Arrays.copyOfRange(pattern,0,k+1)), k+1, bounds[k+1]);
			}
			if (nodes.length>0) {
				double pValue = objective.evaluate(nodes, 1.0).getPValue();
				for (int i=0; i<minima.length; ++i){
					minima[i] = Math.min(minima[i], pValue);
				}
				Log.printf(Log.Level.STANDARD, ">= %s %d %e\n", iupacAlphabet.buildString(pattern), patternLength, pValue);
			}
		}
		printBoundQuality(iupacAlphabet.buildString(previousPattern),bounds,minima,0);
		return 0;
	}
	
	private static void printBoundQuality(String pattern, double[] bounds, double[] minima, int leftMostChanged) {
		for (int prefixLength=leftMostChanged+1; prefixLength<=pattern.length(); ++prefixLength) {
			if (Double.isInfinite(minima[prefixLength])) continue;
			Log.printf(Log.Level.STANDARD, ">! %s %d %e %e %e\n", pattern.substring(0,prefixLength), prefixLength, bounds[prefixLength], minima[prefixLength], minima[prefixLength]/bounds[prefixLength]);
			if (minima[prefixLength]*1.00000001<bounds[prefixLength]) throw new IllegalStateException("THIS IS A BUG!");
		}
	}

}
